import numpy as np
import pandas as pd
from num2words import num2words
# sklearn
from sklearn import metrics
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Pytorch
import torch
from torch.autograd import Variable
import torchvision.transforms as transforms
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib import cm
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
a random n-class classification dataset can be generated using sklearn.datasets.make_classification. Here, we generate a dataset with two features and 1000 instances. Moreover, the dataset is generated for multiclass classification with five classes.
n_features =2
n_classes = 3
X, y = make_classification(n_samples = int((n_classes-1)*1e3), n_features = n_features, n_redundant=0, n_classes = n_classes,
n_informative=2, random_state=1, n_clusters_per_class=1)
Labels_dict = dict(zip(list(np.unique(y)), [num2words(x).title() for x in np.unique(y)]))
Data = pd.DataFrame(data = X, columns = ['Feature %i' % (i+1) for i in range(n_features)])
Target = 'Outcome Variable'
Data[Target] = y
display(Data)
def Plot_Data(X, y, PD, ax = None):
# adding margins
lims = float('%1.e' % X.min())-PD['pad'], float('%1.e' % X.max())+PD['pad']
# Figure
if ax == None:
fig, ax = plt.subplots(1, 1, figsize=(PD['FigSize'], PD['FigSize']))
scatter = ax.scatter(X[:,0], X[:,1], s=PD['cricle_size'],
c=y, edgecolor = 'Navy', alpha = PD['alpha'], cmap = PD['ColorMap'])
_ = ax.legend(handles=scatter.legend_elements()[0], labels= PD['Labels'],
fancybox=True, framealpha=1, shadow=True, borderpad=PD['BP'], loc='best', fontsize = 14)
_ = ax.set_xlim(lims)
_ = ax.set_ylim(lims)
_ = ax.set_xlabel('Feature 1')
_ = ax.set_ylabel('Feature 2')
_ = ax.set_aspect(1)
_ = ax.grid(PD['grid'])
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.25, grid = True, cricle_size = 50,
FigSize = 7, h=0.02, pad=1, ColorMap = 'Set1', Labels = list(Labels_dict.values()))
Plot_Data(X, y, PD = PD, ax = None)
| Feature 1 | Feature 2 | Outcome Variable | |
|---|---|---|---|
| 0 | 0.421823 | -1.258802 | 2 |
| 1 | 1.174360 | 1.586866 | 0 |
| 2 | -0.444844 | 0.623748 | 2 |
| 3 | 1.286082 | 1.791197 | 0 |
| 4 | 1.050679 | 1.105048 | 0 |
| ... | ... | ... | ... |
| 1995 | -1.929029 | 0.119340 | 2 |
| 1996 | -0.010248 | -0.785788 | 1 |
| 1997 | 1.796874 | 3.145459 | 0 |
| 1998 | 1.656980 | 2.623708 | 0 |
| 1999 | 0.820434 | 0.450676 | 0 |
2000 rows × 3 columns
def DatasetTargetDist(Inp, Target, Labels_dict, PD):
# Table
Table = Inp[Target].value_counts().to_frame('Count').reset_index(drop = False).rename(columns = {'index':Target})
Table[Target] = Table[Target].replace(Labels_dict)
Table['Percentage'] = np.round(100*(Table['Count']/Table['Count'].sum()),2)
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=PD['column_widths'],
specs=[[{"type": "table"},{"type": "pie"}]])
# Right
fig.add_trace(go.Pie(labels=Table[Target].values, values=Table['Count'].values,
pull=PD['pull'], textfont=dict(size= PD['textfont']),
marker=dict(colors = PD['PieColors'], line=dict(color='black', width=1))), row=1, col=2)
fig.update_traces(hole=PD['hole'])
fig.update_layout(height = PD['height'], legend=dict(orientation="v"), legend_title_text= PD['legend_title'])
# Left
T = Table.copy()
T['Percentage'] = T['Percentage'].map(lambda x: '%%%.2f' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= PD['TableColors'][0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = PD['tablecolumnwidth'],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [PD['TableColors'][1], PD['TableColors'][1]]),
align=['center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + Target + '<b>', 'x':PD['title_x'],
'y':PD['title_y'], 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Pull = [.01 for x in range((len(Labels_dict)-1))]
Pull.append(.1)
PD = dict(PieColors = px.colors.sequential.Plasma_r, TableColors = ['Navy','White'], hole = .4,
column_widths=[0.6, 0.4],textfont = 14, height = 350, tablecolumnwidth = [0.25, 0.15, 0.15],
pull = Pull, legend_title = Target, title_x = 0.5, title_y = 0.8)
del Pull
DatasetTargetDist(Data, Target, Labels_dict, PD)
StratifiedKFold is a variation of k-fold which returns stratified folds: each set contains approximately the same percentage of samples of each target class as the complete set.
Test_Size = 0.3
sss = StratifiedShuffleSplit(n_splits=1, test_size=Test_Size, random_state=42)
_ = sss.get_n_splits(X, y)
for train_index, test_index in sss.split(X, y):
# X
if isinstance(X, pd.DataFrame):
X_train, X_test = X.loc[train_index], X.loc[test_index]
else:
X_train, X_test = X[train_index], X[test_index]
# y
if isinstance(y, pd.Series):
y_train, y_test = y[train_index], y[test_index]
else:
y_train, y_test = y[train_index], y[test_index]
del sss
def Train_Test_Dist(X_train, y_train, X_test, y_test, PD, Labels_dict = Labels_dict):
def ToSeries(x):
if not isinstance(x, pd.Series):
Out = pd.Series(x)
else:
Out = x.copy()
return Out
fig = make_subplots(rows=1, cols=3, horizontal_spacing = 0.02, column_widths= PD['column_widths'],
specs=[[{"type": "table"},{'type':'domain'}, {'type':'domain'}]])
# Right
C = 2
for y in [ToSeries(y_train).replace(Labels_dict), ToSeries(y_test).replace(Labels_dict)]:
fig.add_trace(go.Pie(labels= list(Labels_dict.values()),
values= y.value_counts().values, pull=PD['pull'],
textfont=dict(size=PD['textfont']),
marker=dict(colors = PD['PieColors'],
line=dict(color='black', width=1))), row=1, col=C)
fig.update_traces(hole=.5)
fig.update_layout(legend=dict(orientation="v"), legend_title_text= PD['legend_title'])
C+=1
# Left
# Table
Table = pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).astype(str)
T = Table.copy()
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = PD['TableColors']
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= TableColors[0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = PD['tablecolumnwidth'],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + 'Dataset Distribution' + '<b>', 'x':PD['title_x'],
'y':PD['title_y'], 'xanchor': 'center', 'yanchor': 'top'})
if not PD['height'] == None:
fig.update_layout(height = PD['height'])
fig.show()
PD.update(dict(column_widths=[0.3, 0.3, 0.3], tablecolumnwidth = [0.2, 0.4], height = 350, legend_title = Target))
Train_Test_Dist(X_train, y_train, X_test, y_test, PD)
Multinomial logistic regression is a classification method that generalizes logistic regression to multiclass problems.
def TorchSets(Set):
# Inut: Arrays
# GPU Cuda
if isinstance(Set, (pd.DataFrame, pd.Series)):
Set = Set.values
if torch.cuda.is_available():
if Set.ndim==1:
Out = Variable(torch.from_numpy(Set).type(torch.LongTensor).cuda())
else:
Out = Variable(torch.from_numpy(Set).cuda())
# CPU
else:
if Set.ndim==1:
Out = Variable(torch.from_numpy(Set).type(torch.LongTensor))
else:
Out = Variable(torch.from_numpy(Set))
return Out
# Tensors
X_train_tensor = TorchSets(X_train)
y_train_tensor = TorchSets(y_train)
X_test_tensor = TorchSets(X_test)
y_test_tensor = TorchSets(y_test)
Batch_size = 100
iteration_number = int(5e3)
epochs_number = int(iteration_number / (len(X_train) / Batch_size))
# Pytorch train and test sets
Train_set = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
Test_set = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
# data loader
train_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
class MLP_Model(torch.nn.Module):
'''
A MLP model with two hidden layers
'''
def __init__(self, input_Size, hidden_Size, output_Size):
super(MLP_Model, self).__init__()
# Input Layer to the 1st Layer:
self.fc1 = torch.nn.Linear(input_Size, hidden_Size)
torch.nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
self.act1 = torch.nn.ReLU()
# 1st Layer to 2nd Layer
self.fc2 = torch.nn.Linear(hidden_Size, int(hidden_Size/4))
torch.nn.init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')
self.act2 = torch.nn.ReLU()
# 2nd layer to Output Layer
self.fc3 = torch.nn.Linear(int(hidden_Size/4), output_Size)
torch.nn.init.kaiming_uniform_(self.fc3.weight)
self.act3 = torch.nn.Softmax(dim=1)
def forward(self, x):
# Input Layer to the 1st Layer:
out = self.fc1(x)
# Non-linearity 1
out = self.act1(out)
# 1st Layer to 2nd Layer
out = self.fc2(out)
out = self.act2(out)
# 2nd layer to Output Layer
out = self.fc3(out)
out = self.act3(out)
return out
class LogisticRegressionModel(torch.nn.Module):
def __init__(self, input_Size, output_Size):
super(LogisticRegressionModel, self).__init__()
self.linear = torch.nn.Linear(input_Size, output_Size)
def forward(self, x):
out = self.linear(x)
return out
def Plot_history(history, Table_Rows = 25, yLim = 2):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Loss'].astype(float).values.round(4),
line=dict(color='OrangeRed', width= 1.5), name = 'Loss'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Accuracy'].astype(float).values,
line=dict(color='MidnightBlue', width= 1.5), name = 'Accuracy'), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, yLim], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
ind = np.linspace(0, history.index[-1], Table_Rows, endpoint = True).round(0).astype(int)
T = history[history.index.isin(ind)]
T[['Loss','Accuracy']] = T[['Loss','Accuracy']].applymap(lambda x: '%.4e' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color='darkslategray',
fill_color='Navy', align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.4, 0.4, 0.4],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color=['Lavender', 'white', 'white']),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
fig.show()
Fitting the model
input_Size, output_Size = n_features, len(Labels_dict)
hidden_Size = 256
# model
model = LogisticRegressionModel(input_Size, output_Size)
# GPU
if torch.cuda.is_available():
model.cuda()
# Cross Entropy Loss
criterion= torch.nn.CrossEntropyLoss()
# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr= 1e-2, momentum=.9)
# Traning the Model
Count = 0
Loss_list = []
Iteration_list = []
Accuracy_list = []
MSE_list = []
MAE_list = []
Steps = 10
Progress_Bar = progressbar.ProgressBar(maxval= iteration_number + 200,
widgets=[progressbar.Bar('=', '|', '|'),
progressbar.Percentage()])
# print('---------------------------------------------------------')
for epoch in range(epochs_number):
for i, (Xtr, ytr) in enumerate(train_loader):
# Variables
Xtr = Variable(Xtr.view(-1, n_features))
ytr = Variable(ytr)
# Set all gradients to zero
optimizer.zero_grad()
# Forward
Out = model(Xtr.float())
# loss
loss = criterion(Out, ytr.long())
# Backward (Calculating the gradients)
loss.backward()
# Update parameters
optimizer.step()
Count += 1
del Xtr, ytr
# Predictions
if Count % Steps == 0:
# Calculate Accuracy
Correct, Total = 0, 0
# Predictions
for Xts, yts in test_loader:
Xts = Variable(Xts.view(-1, n_features))
# Forward
Out = model(Xts.float())
# The maximum value of Out
Predicted = torch.max(Out.data, 1)[1]
# Total number of yts
Total += len(yts)
# Total Correct predictions
Correct += (Predicted == yts).sum()
del Xts, yts
# storing loss and iteration
Loss_list.append(loss.data)
Iteration_list.append(Count)
Accuracy_list.append(Correct / float(Total))
Progress_Bar.update(Count)
Progress_Bar.finish()
history = pd.DataFrame({'Iteration': np.array(Iteration_list),
'Loss': np.array([x.cpu().data.numpy() for x in Loss_list]),
'Accuracy': np.array([x.cpu().data.numpy() for x in Accuracy_list])})
del Loss_list, Iteration_list, Accuracy_list
|=========================================================================|100%
Model Performance
Plot_history(history, Table_Rows = 30, yLim = 1.4)
def Plot_Classification(Model, X, y, PD, ax = None):
# adding margins
x_min, x_max = float('%1.e' % X.min())-PD['pad'], float('%1.e' % X.max())+PD['pad']
y_min, y_max = float('%1.e' % X.min())-PD['pad'], float('%1.e' % X.max())+PD['pad']
# Generating meshgrids
xx, yy = np.meshgrid(np.arange(x_min, x_max, PD['h']), np.arange(y_min, y_max, PD['h']))
Temp = np.c_[xx.ravel(), yy.ravel()]
Temp = TorchSets(Temp)
# Predictions
Pred = Model(Temp.float())
Pred = torch.max(Pred.data, 1)[1]
Pred = Pred.cpu().data.numpy()
Pred = Pred.reshape(xx.shape)
# Figure
if ax == None:
fig, ax = plt.subplots(1, 1, figsize=(PD['FigSize'], PD['FigSize']))
_ = ax.contourf(xx, yy, Pred, cmap = PD['ColorMap'], alpha=PD['bg_alpha'])
scatter = ax.scatter(X[:,0], X[:,1], s=PD['cricle_size'],
c=y, edgecolor = 'Black', linewidths = 0.5, alpha = PD['alpha'], cmap = PD['ColorMap'])
_ = ax.legend(handles=scatter.legend_elements()[0], labels= PD['Labels'],
fancybox=True, framealpha=1, shadow=True, borderpad=PD['BP'], loc='best', fontsize = 14)
_ = ax.set_xlim(x_min, x_max)
_ = ax.set_ylim(y_min, y_max)
_ = ax.set_xlabel('Feature 1')
_ = ax.set_ylabel('Feature 2')
_ = ax.set_aspect(1)
_ = ax.grid(PD['grid'])
#
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.15, grid = False, cricle_size = 50,
FigSize = 7, h=0.02, pad=1, ColorMap = 'Set1', Labels = list(Labels_dict.values()))
fig, ax = plt.subplots(1, 2, figsize=(16, 7))
# Train Set
Plot_Classification(model, X_train, y_train, PD = PD, ax = ax[0])
_ = ax[0].set_title('Train Set', fontsize = 16)
# Test Set
Plot_Classification(model, X_test, y_test, PD = PD, ax = ax[1])
_ = ax[1].set_title('Test Set', fontsize = 16)
The confusion matrix allows for visualization of the performance of an algorithm. Note that due to the size of data, here we don't provide a Cross-validation evaluation. In general, this type of evaluation is preferred.
def Confusion_Mat(CM_Train, CM_Test, PD, n_splits = 10):
if n_splits == None:
Titles = ['Train Set', 'Test Set']
else:
Titles = ['Train Set (CV = % i)' % n_splits, 'Test Set (CV = % i)' % n_splits]
CM = [CM_Train, CM_Test]
Cmap = ['Greens', 'YlGn','Blues', 'PuBu']
for i in range(2):
fig, ax = plt.subplots(1, 2, figsize= PD['FS'])
fig.suptitle(Titles[i], weight = 'bold', fontsize = 16)
_ = sns.heatmap(CM[i], annot=True, annot_kws={"size": PD['annot_kws']}, cmap=Cmap[2*i], ax = ax[0],
linewidths = 0.2, cbar_kws={"shrink": PD['shrink']})
_ = ax[0].set_title('Confusion Matrix');
Temp = np.round(CM[i].astype('float') / CM[i].sum(axis=1)[:, np.newaxis], 2)
_ = sns.heatmap(Temp,
annot=True, annot_kws={"size": PD['annot_kws']}, cmap=Cmap[2*i+1], ax = ax[1],
linewidths = 0.4, vmin=0, vmax=1, cbar_kws={"shrink": PD['shrink']})
_ = ax[1].set_title('Normalized Confusion Matrix');
for a in ax:
_ = a.set_xlabel('Predicted labels')
_ = a.set_ylabel('True labels');
_ = a.xaxis.set_ticklabels(PD['Labels'])
_ = a.yaxis.set_ticklabels(PD['Labels'])
_ = a.set_aspect(1)
# Train
y_pred = model(X_train_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Train = pd.DataFrame(metrics.classification_report(y_train, y_pred, target_names=list(Labels_dict.values()),
output_dict=True)).T
CM_Train = metrics.confusion_matrix(y_train, y_pred)
# Test
y_pred = model(X_test_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Test = pd.DataFrame(metrics.classification_report(y_test, y_pred, target_names=list(Labels_dict.values()),
output_dict=True)).T
CM_Test = metrics.confusion_matrix(y_test, y_pred)
Reports_Train = Reports_Train.reset_index().rename(columns ={'index': 'Train Set'})
Reports_Test = Reports_Test.reset_index().rename(columns ={'index': 'Test Set'})
display(Reports_Train.style.hide_index().set_properties(**{'background-color': 'HoneyDew', 'color': 'Black'}).\
set_properties(subset=['Train Set'], **{'background-color': 'SeaGreen', 'color': 'White'}))
display(Reports_Test.style.hide_index().set_properties(**{'background-color': 'Azure', 'color': 'Black'}).\
set_properties(subset=['Test Set'], **{'background-color': 'RoyalBlue', 'color': 'White'}))
PD = dict(FS = (14, 6), annot_kws = 14, shrink = .6, Labels = list(Labels_dict.values()))
Confusion_Mat(CM_Train, CM_Test, PD = PD, n_splits = None)
| Train Set | precision | recall | f1-score | support |
|---|---|---|---|---|
| Zero | 0.924644 | 0.972163 | 0.947808 | 467.000000 |
| One | 0.821351 | 0.802128 | 0.811625 | 470.000000 |
| Two | 0.773333 | 0.751620 | 0.762322 | 463.000000 |
| accuracy | 0.842143 | 0.842143 | 0.842143 | 0.842143 |
| macro avg | 0.839776 | 0.841970 | 0.840585 | 1400.000000 |
| weighted avg | 0.839926 | 0.842143 | 0.840747 | 1400.000000 |
| Test Set | precision | recall | f1-score | support |
|---|---|---|---|---|
| Zero | 0.918269 | 0.955000 | 0.936275 | 200.000000 |
| One | 0.833333 | 0.820896 | 0.827068 | 201.000000 |
| Two | 0.773196 | 0.753769 | 0.763359 | 199.000000 |
| accuracy | 0.843333 | 0.843333 | 0.843333 | 0.843333 |
| macro avg | 0.841599 | 0.843221 | 0.842234 | 600.000000 |
| weighted avg | 0.841700 | 0.843333 | 0.842340 | 600.000000 |